pkgs <- c("tidyverse", "tidylog", "sp", "raster", "devtools", "RCurl", "sdmTMB", "purrr", "terra", "ncdf4", "chron", "ncdf4", "tidyterra")
if(length(setdiff(pkgs, rownames(installed.packages()))) > 0){
install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = T)
}
invisible(lapply(pkgs, library, character.only = T))
# Source code for map plots
devtools::source_url("https://raw.githubusercontent.com/VThunell/Lammska_cod-fr/main/R/functions/map-plot.R")
# Set path
home <- here::here()Add covariates to stomach data
Libraries
Read data
dat <- read_csv(paste0(home, "/data/clean/stomachs3.csv"))
glimpse(dat)Rows: 136,466
Columns: 28
$ pred_ID <chr> "LV_1968_9_19_1_37G4_52", "LV_1973_5_11_15_37G5_100…
$ herring <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ saduria <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.54, 0.0…
$ sprat <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ other <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ other_invert <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.34, 0.0…
$ other_fish <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ fr_tot <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.0…
$ fr_sad <dbl> 0.000000000, 0.000000000, 0.000000000, 0.000000000,…
$ fr_spr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ fr_her <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ fr_other <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ fr_other_invert <dbl> 0.000000000, 0.000000000, 0.000000000, 0.000000000,…
$ fr_other_fish <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ year <dbl> 1968, 1973, 1973, 1973, 1973, 1983, 1983, 1966, 196…
$ month <dbl> 9, 5, 5, 5, 5, 12, 12, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6…
$ day <dbl> 19, 11, 11, 11, 11, 16, 16, 22, 22, 22, 22, 19, 19,…
$ day_of_year <dbl> 263, 131, 131, 131, 131, 350, 350, 22, 22, 22, 22, …
$ pred_weight <dbl> 156.25, 49.13, 49.13, 58.32, 40.96, 2160.00, 4565.3…
$ pred_length <dbl> 25, 17, 17, 18, 16, 60, 77, 18, 25, 19, 19, 31, 33,…
$ pred_weight_source <chr> "estimated", "estimated", "estimated", "estimated",…
$ lat <dbl> 54.25, 54.25, 54.25, 54.25, 54.25, 54.25, 54.25, 54…
$ lon <dbl> 14.5, 15.5, 15.5, 15.5, 15.5, 15.5, 15.5, 19.5, 19.…
$ ices_rect <chr> "37G4", "37G5", "37G5", "37G5", "37G5", "37G5", "37…
$ X <dbl> 467.4220, 532.5780, 532.5780, 532.5780, 532.5780, 5…
$ Y <dbl> 6011.453, 6011.453, 6011.453, 6011.453, 6011.453, 6…
$ data_source <chr> "old_db", "old_db", "old_db", "old_db", "old_db", "…
$ Country <chr> "LV", "LV", "LV", "LV", "LV", "LV", "LV", "LV", "LV…
Add depth
## Add depth
dep_raster <- terra::rast(paste0(home, "/data/Mean depth natural colour (with land).nc"))
class(dep_raster)[1] "SpatRaster"
attr(,"package")
[1] "terra"
crs(dep_raster, proj = TRUE)[1] "+proj=longlat +datum=WGS84 +no_defs"
plot(dep_raster)dat$depth <- terra::extract(dep_raster, dat |> dplyr::select(lon, lat))$elevation
ggplot(dat, aes(lon, lat, color = depth*-1)) +
geom_point()dat$depth <- dat$depth*-1
# TODO: these coordinates are waaay off
# This removes 8349 rows
dat <- dat |> drop_na(depth)
dat |>
ggplot(aes(X*1000, Y*1000, color = depth)) +
geom_point() +
NULLhist(dat$depth)#remove points outside the baltic, i.e. the western most points in:
dat |>
ggplot(aes(lon, lat, color = depth)) +
geom_point() +
geom_vline(xintercept = 13) +
coord_sf()dat <- dat |>
filter( lon > 13)
plot_map_fc +
geom_point(data = dat, aes(X*1000, Y*1000, color = depth), alpha = 0.5) +
theme_sleek(base_size = 6) +
geom_sf()Warning: Removed 3480 rows containing missing values or values outside the scale range
(`geom_point()`).
Add oxy, temp, and sal from SMHI hindcast (and models of Copernicus and hindcast)
# read Hindcast data
hindenv_df <- readRDS(file = paste0(home, "/data/clean/hindcast_1961_2017.rds")) |>
filter(year > 1962) |>
mutate(yearmonth = (year-1963)*12+month)
dat <- dat |> mutate(yearmonth = (year-1963)*12+month)
# for the following values we need to model the hindcast
lateobs_my <- dat |> filter(year > 2017) |> distinct(yearmonth)
# make a pred grid for those late observations
lateobs_predhind <- hindenv_df |>
distinct(lat, lon) |>
replicate_df(time_name = "yearmonth", time_values = pull(lateobs_my)) |>
mutate( model = as_factor("hindcast"),
year = floor(1963+(yearmonth/12)),
month = yearmonth %% 12) |> # mod, i.e. the remainder of an integer divide (here month)
add_utm_columns(ll_names = c("lon", "lat"), utm_crs = 32633)
# Predict using the models for oxy, sal and temp. For reference see 1c_combine_oxysaltemp_data.qmd
# load model for oxygen
Mod_oxy <- readRDS(paste0(home, "/R/prepare-data/Mod_oxy.rds"))
# load model for salinity
Mod_sal <- readRDS(paste0(home, "/R/prepare-data/Mod_sal.rds"))
# load model for temperature
Mod_temp <- readRDS(paste0(home, "/R/prepare-data/Mod_temp.rds"))
oxypreds <- predict(Mod_oxy, newdata = lateobs_predhind) |> mutate(oxy = est) |> dplyr::select(lat, lon, year, month, yearmonth, oxy)
salpreds <- predict(Mod_sal, newdata = lateobs_predhind) |> mutate(sal = est) |> dplyr::select(lat, lon, year, month, yearmonth, sal)
temppreds <- predict(Mod_temp, newdata = lateobs_predhind) |> mutate(temp = est) |> dplyr::select(lat, lon, year, month, yearmonth, temp)
# Combine hindccast preds for 2018-2023 with hindcast (1963-2017)
hindcast_allyears <- left_join(salpreds, oxypreds) |> left_join(temppreds) |>
bind_rows(hindenv_df |> dplyr::select(temp, sal, oxy, lat, lon, year, month, yearmonth))Joining with `by = join_by(lat, lon, year, month, yearmonth)`
Joining with `by = join_by(lat, lon, year, month, yearmonth)`
map(hindcast_allyears, ~sum(is.na(.))) # No NAs$lat
[1] 0
$lon
[1] 0
$year
[1] 0
$month
[1] 0
$yearmonth
[1] 0
$sal
[1] 0
$oxy
[1] 0
$temp
[1] 0
hindcast_allyears |> # for >2018 we only have the months that we need for the observations in dat
summarise(nobs = n(), .by = c(month, year)) |>
arrange(year)# A tibble: 675 × 3
month year nobs
<dbl> <dbl> <int>
1 1 1963 18300
2 2 1963 18300
3 3 1963 18300
4 4 1963 18300
5 5 1963 18300
6 6 1963 18300
7 7 1963 18300
8 8 1963 18300
9 9 1963 18300
10 10 1963 18300
# ℹ 665 more rows
hindcast_allyears |>
summarise(mean_oxy = mean(oxy), .by = c(year, month)) |>
filter(month %in% c(2,3,11)) |>
ggplot() +
geom_line(aes(year, mean_oxy, color = factor(month)), linetype = "dashed") +
geom_line(data = hindenv_df |> filter(month %in% c(2,3,11)) |> summarise(oxy = mean(oxy), .by = c(year, month)), aes(x = year, y = oxy, color = factor(month)))hindcast_allyears |>
summarise(mean_temp = mean(temp), .by = c(year, month)) |>
filter(month %in% c(2,3,11)) |>
ggplot() +
geom_line(aes(year, mean_temp, color = factor(month)), linetype = "dashed") +
geom_line(data = hindenv_df |> filter(month %in% c(2,3,11)) |> summarise(temp = mean(temp), .by = c(year, month)), aes(x = year, y = temp, color = factor(month)))# Now use terra::extract to get oxy, sal and temp values to dat
# function for extraction based on yearmonth
ext_envdat <- function(ayearmonth) {
ext_y = dat |> filter(yearmonth == ayearmonth) |> dplyr::select(lon, lat) # coords from the stomach data, can only be lon and lat for extract()
hindcast_allyears |>
filter(yearmonth == ayearmonth) |>
as_spatraster(xycols = c(2,1), crs = "WGS84", digits = 2) |>
terra::extract(ext_y, method = "bilinear", ID=FALSE) |> # to reduce NaNs produced when points are outside raster extent (i.e. on land)
dplyr::select(oxy, sal, temp) |>
bind_cols(dat |> filter(yearmonth == ayearmonth))
}
dat <- dat |>
mutate(yearmonth = (year-1963)*12+month)
# use the ext_envdat function to extract env covs values to the observations fr hindcast
dat_env <- unique(dat |> pull(yearmonth)) |>
map(\(x) ext_envdat(x)) |>
list_rbind()
# Many observations lie just outside the hincast_allyears raster extent causing NA values
theNAs <- dat_env |> filter(is.na(oxy) | is.na(sal) | is.na(temp))
theNAs |> summarise(n = n(), .by= year) |> arrange(year) year n
1 1967 3
2 1974 158
3 1976 20
4 1977 62
5 1978 48
6 1979 266
7 1980 131
8 1981 8
9 2013 79
# filter out those observations from the data using pred_ID
dat_na <- dat |>
filter(pred_ID %in% theNAs$pred_ID)
# a function to find the closest point with env covs in the env_dat
cov_na_fill <- function(napred_id) {
pid_lly = dat_na |>
filter(pred_ID == napred_id) |> dplyr::select(lon, lat, yearmonth)
pl = distance(pid_lly |> dplyr::select(lon, lat),
hindcast_allyears |> filter(yearmonth == pid_lly$yearmonth) |> dplyr::select(lon,lat),
lonlat = TRUE)
dat_na |>
filter(pred_ID == napred_id) |>
bind_cols(hindcast_allyears[which.min(pl),] |> dplyr::select(oxy,temp,sal) )
}
# apply function to dat_na
dat_na_filled <- dat_na$pred_ID |>
map(\(x) cov_na_fill(x)) |>
list_rbind()
# no NAs left
dat_na_filled |> filter(is.na(oxy) | is.na(sal) | is.na(temp))# A tibble: 0 × 33
# ℹ 33 variables: pred_ID <chr>, herring <dbl>, saduria <dbl>, sprat <dbl>,
# other <dbl>, other_invert <dbl>, other_fish <dbl>, fr_tot <dbl>,
# fr_sad <dbl>, fr_spr <dbl>, fr_her <dbl>, fr_other <dbl>,
# fr_other_invert <dbl>, fr_other_fish <dbl>, year <dbl>, month <dbl>,
# day <dbl>, day_of_year <dbl>, pred_weight <dbl>, pred_length <dbl>,
# pred_weight_source <chr>, lat <dbl>, lon <dbl>, ices_rect <chr>, X <dbl>,
# Y <dbl>, data_source <chr>, Country <chr>, depth <dbl>, yearmonth <dbl>, …
# bind the non-NA observations with the NAs
dat_env_all <- dat_env |>
filter(!is.na(oxy) | !is.na(sal) | !is.na(temp)) |>
bind_rows(dat_na_filled)
# all are there?(!)
dat_env_all |> summarise(n()) == dat |> summarise(n()) n()
[1,] TRUE
sum(is.na(dat_env_all$oxy))[1] 0
# check plots
plot_map_fc +
geom_point(data = dat_env_all, aes(X*1000, Y*1000, color = oxy), alpha = 0.5) +
theme_sleek(base_size = 6) +
geom_sf() Warning: Removed 3480 rows containing missing values or values outside the scale range
(`geom_point()`).
plot_map_fc +
geom_point(data = dat_env_all, aes(X*1000, Y*1000, color = temp), alpha = 0.5) +
theme_sleek(base_size = 6) +
geom_sf() Warning: Removed 3480 rows containing missing values or values outside the scale range
(`geom_point()`).
plot_map_fc +
geom_point(data = dat_env_all, aes(X*1000, Y*1000, color = sal), alpha = 0.5) +
theme_sleek(base_size = 6) +
geom_sf() Warning: Removed 3480 rows containing missing values or values outside the scale range
(`geom_point()`).
dat_env <- dat_env_all |> dplyr::select(-yearmonth)Add cod densities, only for post 1992!
# for the Mod, the fr data is instead used to scale the prediction grid
data_stats <- read_csv(paste0(home, "/data/clean/data_stats.csv")) Rows: 12328 Columns: 43
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): country, haul_id, ices_rect, month_year
dbl (39): year, lat, lon, quarter, month, sub_div, oxy, temp, sal, depth, X,...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
m3 <- readRDS(paste0(home, file = "/R/main-analysis/m3.rds"))
hist(m3$data$density)# d_stats is used to scale the prediction grid when predictiong cod densities
data_stats_st <- data_stats |>
summarise(depth_mean = mean(depth, na.omit = TRUE),
depth_sd = sd(depth),
sal_mean = mean(sal, na.omit = TRUE),
sal_sd = sd(sal),
oxy_mean = mean(oxy, na.omit = TRUE),
oxy_sd = sd(oxy),
temp_mean = mean(temp, na.omit = TRUE),
temp_sd = sd(temp))
df_m3 <- dat_env |>
filter(between(year, 1993, 2021)) |>
drop_na(oxy,
sal,
temp) |>
mutate(depth = ifelse(depth < 0, 0, depth),
depth_sc = (depth - data_stats_st$depth_mean)/data_stats_st$depth_sd,
oxy_sc = (oxy - data_stats_st$oxy_mean)/data_stats_st$oxy_sd,
sal_sc = (sal - data_stats_st$sal_mean)/data_stats_st$sal_sd,
temp_sc = (temp - data_stats_st$temp_mean)/data_stats_st$temp_sd,
depth_sq = depth_sc^2,
temp_sq = temp_sc^2,
year_f = as.factor(year),
quarter_f = as.factor(1))
# Predict cod cpue density with model model from Max
cpue_cod <- predict(m3, newdata = df_m3) |>
mutate(log_density_cod = est) |> # on the log scale Max and Sean says!
dplyr::select(lat, lon, log_density_cod) |>
distinct()
dat_env2 <- dat_env |>
left_join(cpue_cod) Joining with `by = join_by(lat, lon)`
Warning in left_join(dat_env, cpue_cod): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 2 of `x` matches multiple rows in `y`.
ℹ Row 480 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
hist(exp(dat_env2$log_density_cod))Save
# Save data
saveRDS(dat_env2, paste0(home, "/data/clean/stom_env.rds"))